In this notebook, you will find common and useful visualization results from the data obtained in the Alkaloid extraction report
This section contains the libraries requiered in order to conduct the data analysis and visualization part of this notebook.
library(tidyverse)
library(plotly)
library(DT)
library(factoextra)
library(FactoMineR)
library(patchwork)
library(GGally)
Here, we are going to import the short version of the results provided by ms-mint.
alkaloids_area <- read_csv("Results/All_samples_fixed_rt.csv")
Now that we have our data in memory, we can display it.
## Rows: 3
## Columns: 6
## $ ...1 <dbl> 0, 1, 2
## $ ms_file_label <chr> "SPM_7_pos_20", "SPM_7_pos_20", "SPM_7_pos_20"
## $ peak_label <chr> "Dehydrotomatine", "alpha-Tomatine", "Tomatidine"
## $ mz_mean <dbl> 1034.5422, 1036.5613, 416.3531
## $ peak_area <dbl> 1425829, 5211881, 0
## $ Sample <chr> "SPM", "SPM", "SPM"
As we see in the ms_file_name, that is the sample concatenated name
that includes the Specie, Replicate,
Polarity, and Injection number. In the last column,
Sample, the specie acronym is shown. At this point, the
table is not intuitive, and we are going to work on this.
Samplealkaloids_clean <- alkaloids_area %>%
separate(ms_file_label,
into = c("Specie", "Replicate", "Polarity", "InjectionNumber"),
remove = F) %>%
select(-Sample, -Polarity, -`...1`) %>%
rename(Alkaloid = peak_label, TargetIon = mz_mean, Area = peak_area) %>%
select(ms_file_label, Specie, Replicate, InjectionNumber,
TargetIon, Alkaloid, Area) %>%
mutate(Replicate = factor(Replicate, levels = seq(10)))
Now that we have a cleaner table, we are able to display a better column named table.
Remember to use the search bar to look for specific results.
The first bar plot refers to the absolute peak are quantified by sample and alkaloid.
bar_alkaloids_absol <- alkaloids_clean %>%
filter(!(Specie %in% "OH8243Fruit")) %>%
ggplot(aes(Replicate, Area, fill = Alkaloid)) +
geom_col() +
facet_wrap("Specie", ncol = 5) +
theme_classic() +
labs(x = "Replicate number", y = "Peak area",
title = "Barplot of peak area per alkaloid")
bar_alkaloids_absol %>% ggplotly
In contrast to the previous plot, where the bar height represents the area observed in each sample, the following plot represent the relative alkaloid content per sample. This plot serves merely to address visual comparison per sample, and not between samples, since the bar height was normalized.
bar_alkaloids_rel <- alkaloids_clean %>%
filter(!(Specie %in% "OH8243Fruit")) %>%
ggplot(aes(Replicate, Area, fill = Alkaloid)) +
geom_col(position = "fill") +
facet_wrap("Specie", ncol = 5) +
theme_classic() +
labs(x = "Replicate number", y = "Relative peak area",
title = "Barplot of relative peak area per alkaloid")
bar_alkaloids_rel %>% ggplotly
The following section will be based on the following documentation.
alkaloids_wide <- alkaloids_clean %>% select(-TargetIon) %>%
filter(!(Specie %in% "OH8243Fruit")) %>%
pivot_wider(names_from = Alkaloid, values_from = Area, values_fill = 0)
write.csv(alkaloids_wide, file = "Results/wide_Alkaloids_allSamples.csv", row.names = F)
Centered Not scaled
alkaloids_only <- alkaloids_wide %>%
select(Dehydrotomatine:LycoEscu_rt2p4)
alkaloids_pca <- PCA(alkaloids_only, graph = FALSE)
fviz_screeplot(alkaloids_pca, addlabels = TRUE, ylim = c(0, 40))
vars_info <- get_pca_var(alkaloids_pca)
vars_info$contrib %>% as.data.frame() %>%
select(PC1 = Dim.1, PC2 = Dim.2, PC3 = Dim.3, PC4 = Dim.4) %>%
arrange(-PC1, -PC2, -PC3) %>% DT::datatable()
fviz_pca_var(alkaloids_pca, col.var="contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = T # Avoid text overlapping
) + labs(x = "PC1 (36.4%)", y = "PC2 (26.2%)", color = "Contribution")
# Contributions of variables to PC1
top5_pc1 <- fviz_contrib(alkaloids_pca, choice = "var", axes = 1, top = 5)
# Contributions of variables to PC2
top5_pc2 <- fviz_contrib(alkaloids_pca, choice = "var", axes = 2, top = 5)
top5_pc1 + top5_pc2
sample_scores <- alkaloids_pca$ind$coord %>% data.frame() #Coordinates
names(sample_scores) <- paste0("PC", seq(5))
alkaloids_scores <- bind_cols(alkaloids_wide, sample_scores)
# Impossible to color by Specie and add a legend
ggpairs(alkaloids_scores, columns = 16:20,
upper = list(continuous = "points", combo = "box_no_facet"),
lower = list(continuous = "points", combo = "dot_no_facet"))
alkaloids_scores %>%
plot_ly() %>%
add_trace(x = ~PC1, y = ~PC2, color = ~Specie,
text = ~paste("Specie: ", Specie,
"<br>Replicate: ", Replicate,
"<br>Dehydrotomatine: ", Dehydrotomatine,
"<br>alpha-Tomatine: ", `alpha-Tomatine`,
"<br>Tomatidine: ", Tomatidine,
"<br>Hydroxytomatine_range: ", Hydroxytomatine_range,
"<br>Acetoxytomatine_II_rt4p4", Acetoxytomatine_II_rt4p4,
"<br>Acetoxytomatine_II_rt5p3", Acetoxytomatine_II_rt5p3,
"<br>Acetoxytomatine_II_rt5p4: ", Acetoxytomatine_II_rt5p4,
"<br>EsculeosideB_rt2p2: ", EsculeosideB_rt2p2,
"<br>EsculeosideB_rt2p3: ", EsculeosideB_rt2p3,
"<br>LycoEscu_rt3p3", LycoEscu_rt3p3,
"<br>LycoEscu_rt2p4", LycoEscu_rt2p4) )
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS 13.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] GGally_2.1.2 patchwork_1.1.2 FactoMineR_2.8 factoextra_1.0.7
## [5] DT_0.28 plotly_4.10.2 lubridate_1.9.2 forcats_1.0.0
## [9] stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1 readr_2.1.4
## [13] tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2 tidyverse_2.0.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.6 sass_0.4.7 bit64_4.0.5
## [4] vroom_1.6.3 jsonlite_1.8.7 viridisLite_0.4.2
## [7] carData_3.0-5 bslib_0.5.0 highr_0.10
## [10] yaml_2.3.7 ggrepel_0.9.3 backports_1.4.1
## [13] pillar_1.9.0 lattice_0.21-8 glue_1.6.2
## [16] digest_0.6.33 ggsignif_0.6.4 RColorBrewer_1.1-3
## [19] colorspace_2.1-0 htmltools_0.5.5 plyr_1.8.8
## [22] pkgconfig_2.0.3 broom_1.0.5 xtable_1.8-4
## [25] mvtnorm_1.2-2 scales_1.2.1 tzdb_0.4.0
## [28] timechange_0.2.0 emmeans_1.8.7 car_3.1-2
## [31] generics_0.1.3 farver_2.1.1 ggpubr_0.6.0
## [34] ellipsis_0.3.2 cachem_1.0.8 withr_2.5.0
## [37] lazyeval_0.2.2 cli_3.6.1 magrittr_2.0.3
## [40] crayon_1.5.2 estimability_1.4.1 evaluate_0.21
## [43] fansi_1.0.4 MASS_7.3-60 rstatix_0.7.2
## [46] tools_4.2.0 data.table_1.14.8 hms_1.1.3
## [49] lifecycle_1.0.3 munsell_0.5.0 cluster_2.1.4
## [52] flashClust_1.01-2 compiler_4.2.0 jquerylib_0.1.4
## [55] multcompView_0.1-9 rlang_1.1.1 grid_4.2.0
## [58] rstudioapi_0.15.0 htmlwidgets_1.6.2 crosstalk_1.2.0
## [61] leaps_3.1 labeling_0.4.2 rmarkdown_2.23
## [64] gtable_0.3.3 abind_1.4-5 reshape_0.8.9
## [67] R6_2.5.1 knitr_1.43 fastmap_1.1.1
## [70] bit_4.0.5 utf8_1.2.3 stringi_1.7.12
## [73] parallel_4.2.0 Rcpp_1.0.11 vctrs_0.6.3
## [76] scatterplot3d_0.3-44 tidyselect_1.2.0 xfun_0.39